{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "431302f5-74b8-43fe-b057-0b4dd387faaa",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import statsmodels.formula.api as smf\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "89f53311-378e-47b1-97c6-a6a2c45efe5e",
   "metadata": {},
   "source": [
    "## 1. Retuls with coeff plot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "17300804-4d40-4773-9e40-b40f0b8d70cb",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 0) PATH\n",
    "# ============================================================\n",
    "DATA_DIR = r\"E:\\\"\n",
    "DATA_FILE = os.path.join(DATA_DIR, \"df.xlsx\")\n",
    "\n",
    "OUT_DIR = os.path.join(DATA_DIR, \"_results\")\n",
    "os.makedirs(OUT_DIR, exist_ok=True)\n",
    "\n",
    "df = pd.read_excel(DATA_FILE)\n",
    "\n",
    "# ============================================================\n",
    "# 1) Variables\n",
    "# ============================================================\n",
    "P = \"vignette_presence_P\"\n",
    "S = \"vignette_support_S\"\n",
    "\n",
    "DVs = {\n",
    "    \"Legitimacy\": \"dv_legitimacy_1to7\",\n",
    "    \"Employer responsibility\": \"dv_employer_responsibility_1to7\",\n",
    "    \"Need for protection\": \"dv_protection_need_1to7\",\n",
    "    \"Social essentiality\": \"dv_social_essentiality_1to7\",\n",
    "    \"Compensation support\": \"dv_compensation_support_1to7\"\n",
    "}\n",
    "\n",
    "# ---- Norm\n",
    "norm_items = [\"mr_right_1to7\",\"mr_outdated_1to7\",\"mr_legit_1to7\",\"mr_no_reason_1to7\",\"mr_standard_1to7\"]\n",
    "if \"mr_perk_rev_1to7\" in df.columns:\n",
    "    df[\"mr_perk_rev_reversed\"] = 8 - df[\"mr_perk_rev_1to7\"]\n",
    "    norm_items += [\"mr_perk_rev_reversed\"]\n",
    "df[\"norm_index\"] = df[norm_items].mean(axis=1)\n",
    "df[\"norm_c\"] = df[\"norm_index\"] - df[\"norm_index\"].mean()\n",
    "\n",
    "# ---- Remote feasibility\n",
    "df[\"remote_feas\"] = df[\"job_remote_feas_0to10\"] / 10.0\n",
    "df[\"remote_feas\"] = df[\"remote_feas\"].fillna(df[\"remote_feas\"].mean())\n",
    "df[\"remote_c\"] = df[\"remote_feas\"] - df[\"remote_feas\"].mean()\n",
    "\n",
    "# ============================================================\n",
    "# 2) Controls (MAIN)\n",
    "# ============================================================\n",
    "CONTROL_TERMS_MAIN = [\n",
    "    \"age\",\n",
    "    \"C(gender)\",\n",
    "    \"C(region)\",\n",
    "    \"C(education)\",\n",
    "    \"C(monthly_income_band)\",\n",
    "    \"C(homeownership)\",\n",
    "    \"C(marital_status)\",\n",
    "    \"C(union_member)\",\n",
    "    \"C(employment_type)\",\n",
    "    \"C(firm_size)\",\n",
    "    \"ideology_0to10\"\n",
    "]\n",
    "CONTROLS_MAIN = \" + \".join(CONTROL_TERMS_MAIN)\n",
    "\n",
    "# ============================================================\n",
    "# 3) Helpers\n",
    "# ============================================================\n",
    "def fit_ols(formula, data):\n",
    "    return smf.ols(formula, data=data).fit(cov_type=\"HC3\")\n",
    "\n",
    "def short_term_name(term):\n",
    "    return (term.replace(P, \"P\")\n",
    "                .replace(S, \"S\")\n",
    "                .replace(\"norm_c\", \"Norm\")\n",
    "                .replace(\"remote_c\", \"RemoteFeas\")\n",
    "                .replace(\":\", \"×\"))\n",
    "\n",
    "def extract_terms(model, term_list):\n",
    "    rows = []\n",
    "    for t in term_list:\n",
    "        if t in model.params.index:\n",
    "            rows.append((t,\n",
    "                         float(model.params[t]),\n",
    "                         float(model.bse[t]),\n",
    "                         float(model.pvalues[t])))\n",
    "    return rows\n",
    "\n",
    "def coefpanel(ax, rows, panel_title, xlab=None, panel_tag=\"\"):\n",
    "    if not rows:\n",
    "        ax.axis(\"off\")\n",
    "        return\n",
    "\n",
    "    y = np.arange(len(rows))\n",
    "    coefs = np.array([r[1] for r in rows])\n",
    "    ses   = np.array([r[2] for r in rows])\n",
    "\n",
    "    ci_low  = coefs - 1.96*ses\n",
    "    ci_high = coefs + 1.96*ses\n",
    "\n",
    "    # CI & points\n",
    "    ax.hlines(y, ci_low, ci_high, color=\"black\", linewidth=2)\n",
    "    ax.plot(coefs, y, \"o\", color=\"red\", markersize=9)\n",
    "\n",
    "    # zero line\n",
    "    ax.axvline(0, color=\"red\", linestyle=\"--\", linewidth=1.6)\n",
    "\n",
    "    labels = [short_term_name(r[0]) for r in rows]\n",
    "    ax.set_yticks(y)\n",
    "    ax.set_yticklabels(labels, fontsize=20)  \n",
    "    ax.invert_yaxis()\n",
    "\n",
    "    xmin, xmax = min(ci_low.min(), 0), max(ci_high.max(), 0)\n",
    "    span = xmax - xmin\n",
    "    ax.set_xlim(xmin - 0.2*span, xmax + 0.2*span)\n",
    "\n",
    "    dx = 0.04 * span\n",
    "    for i, (t, coef, se, p) in enumerate(rows):\n",
    "        txt = f\"{coef:.2f}, p={p:.3f}\"\n",
    "        x_text = coef + (dx if coef >= 0 else -dx)\n",
    "  #      ax.text(x_text, i+0.006, txt, va=\"bottom\", fontsize=16)  \n",
    "\n",
    "        ax.annotate(\n",
    "            txt,\n",
    "            xy=(x_text, i),\n",
    "            xytext=(0, 3),              \n",
    "            textcoords=\"offset points\",\n",
    "            ha=\"left\",\n",
    "            va=\"bottom\",\n",
    "            fontsize=15\n",
    "        )\n",
    "\n",
    "\n",
    "    \n",
    "    ax.set_title(f\"{panel_tag} {panel_title}\", fontsize=22, pad=12)\n",
    "    ax.tick_params(axis=\"x\", labelsize=17, pad=6)\n",
    "    ax.tick_params(axis=\"y\", labelsize=20, pad=7)\n",
    "\n",
    "    if xlab:\n",
    "        ax.set_xlabel(xlab, fontsize=18, labelpad=10)\n",
    "\n",
    "# ============================================================\n",
    "# 4) Main Figures (H1–H3, no triple)\n",
    "# ============================================================\n",
    "SHOW_PxS = False\n",
    "\n",
    "for dv_label, dv_col in DVs.items():\n",
    "    m1 = fit_ols(f\"{dv_col} ~ {P}*{S} + {CONTROLS_MAIN}\", df)\n",
    "    rows_h1 = extract_terms(m1, [P, S] + ([f\"{P}:{S}\"] if SHOW_PxS else []))\n",
    "\n",
    "    m2 = fit_ols(f\"{dv_col} ~ {P}*{S} + norm_c + {P}:norm_c + {CONTROLS_MAIN}\", df)\n",
    "    rows_h2 = extract_terms(m2, [f\"{P}:norm_c\"])\n",
    "\n",
    "    m3 = fit_ols(f\"{dv_col} ~ {P}*{S} + remote_c + {P}:remote_c + {CONTROLS_MAIN}\", df)\n",
    "    rows_h3 = extract_terms(m3, [f\"{P}:remote_c\"])\n",
    "\n",
    "    fig, axes = plt.subplots(1, 3, figsize=(22, 6.2), constrained_layout=True)\n",
    "    fig.subplots_adjust(wspace=0.55)\n",
    "#    fig.suptitle(f\"{dv_label}: Main Results (H1–H3)\", fontsize=18, y=1.03)\n",
    "\n",
    "    coefpanel(axes[0], rows_h1, \"H1 (ATE): P, S\", \"Coefficient (95% CI)\", \"(a)\")\n",
    "    coefpanel(axes[1], rows_h2, \"H2: Presence × Norm\", \"Coefficient (95% CI)\", \"(b)\")\n",
    "    coefpanel(axes[2], rows_h3, \"H3: Presence × RemoteFeas\", \"Coefficient (95% CI)\", \"(c)\")\n",
    "\n",
    "    outpath = os.path.join(OUT_DIR, f\"DV_{dv_label.replace(' ','_')}_H1H2H3_1row_v4.png\")\n",
    "    plt.savefig(outpath, dpi=600, bbox_inches=\"tight\")\n",
    "    plt.show()\n",
    "    plt.close()\n",
    "\n",
    "print(\"Done. Final figures saved to:\", OUT_DIR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1d7a6e87-5bb8-4eb8-847f-476935893898",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "83c0c69b-0ea4-4fc6-9e5e-978721f7a2c8",
   "metadata": {},
   "source": [
    "## 2. Full results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0fde733e-12e1-4384-bfd7-19faeab054ca",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import statsmodels.formula.api as smf\n",
    "from IPython.display import display, Markdown\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a8f407ac-4124-4aee-9215-9537be04bbf3",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ============================================================\n",
    "# 0) PATH\n",
    "# ============================================================\n",
    "DATA_DIR = r\"E:\\\"\n",
    "DATA_FILE = os.path.join(DATA_DIR, \"df.xlsx\")\n",
    "\n",
    "df = pd.read_excel(DATA_FILE)\n",
    "\n",
    "# ============================================================\n",
    "# 1) Variables\n",
    "# ============================================================\n",
    "P = \"vignette_presence_P\"\n",
    "S = \"vignette_support_S\"\n",
    "\n",
    "DVs = {\n",
    "    \"Figure 1 (Legitimacy)\": \"dv_legitimacy_1to7\",\n",
    "    \"Figure 2 (Employer responsibility)\": \"dv_employer_responsibility_1to7\",\n",
    "    \"Figure 3 (Compensation support)\": \"dv_compensation_support_1to7\",\n",
    "    \"Figure 4 (Need for protection)\": \"dv_protection_need_1to7\",\n",
    "    \"Figure 5 (Social essentiality)\": \"dv_social_essentiality_1to7\"\n",
    "}\n",
    "\n",
    "# ---- Norm index\n",
    "norm_items = [\"mr_right_1to7\",\"mr_outdated_1to7\",\"mr_legit_1to7\",\"mr_no_reason_1to7\",\"mr_standard_1to7\"]\n",
    "if \"mr_perk_rev_1to7\" in df.columns:\n",
    "    df[\"mr_perk_rev_reversed\"] = 8 - df[\"mr_perk_rev_1to7\"]\n",
    "    norm_items += [\"mr_perk_rev_reversed\"]\n",
    "df[\"norm_index\"] = df[norm_items].mean(axis=1)\n",
    "df[\"norm_c\"] = df[\"norm_index\"] - df[\"norm_index\"].mean()\n",
    "\n",
    "# ---- Remote feasibility\n",
    "df[\"remote_feas\"] = df[\"job_remote_feas_0to10\"] / 10.0\n",
    "df[\"remote_feas\"] = df[\"remote_feas\"].fillna(df[\"remote_feas\"].mean())\n",
    "df[\"remote_c\"] = df[\"remote_feas\"] - df[\"remote_feas\"].mean()\n",
    "\n",
    "# ============================================================\n",
    "# 2) Controls (MAIN)\n",
    "# ============================================================\n",
    "CONTROL_TERMS = [\n",
    "    \"age\",\n",
    "    \"C(gender)\",\n",
    "    \"C(region)\",\n",
    "    \"C(education)\",\n",
    "    \"C(monthly_income_band)\",\n",
    "    \"C(homeownership)\",\n",
    "    \"C(marital_status)\",\n",
    "    \"C(union_member)\",\n",
    "    \"C(employment_type)\",\n",
    "    \"C(firm_size)\",\n",
    "    \"ideology_0to10\",\n",
    "]\n",
    "CONTROLS = \" + \".join(CONTROL_TERMS)\n",
    "\n",
    "# ============================================================\n",
    "# 3) Helpers\n",
    "# ============================================================\n",
    "def fit_ols(formula, data):\n",
    "    return smf.ols(formula, data=data).fit(cov_type=\"HC3\")\n",
    "\n",
    "def full_table(m):\n",
    "    \"\"\"Full coefficient table with HC3 robust SE and 95% CI.\"\"\"\n",
    "    out = pd.DataFrame({\n",
    "        \"term\": m.params.index,\n",
    "        \"coef\": m.params.values,\n",
    "        \"se_HC3\": m.bse.values,\n",
    "        \"t\": m.tvalues.values,\n",
    "        \"p\": m.pvalues.values\n",
    "    })\n",
    "    out[\"ci_low\"] = out[\"coef\"] - 1.96*out[\"se_HC3\"]\n",
    "    out[\"ci_high\"] = out[\"coef\"] + 1.96*out[\"se_HC3\"]\n",
    "    return out\n",
    "\n",
    "def add_sig_stars(p):\n",
    "    if p < 0.01: return \"***\"\n",
    "    if p < 0.05: return \"**\"\n",
    "    if p < 0.1: return \"*\"\n",
    "    return \"\"\n",
    "\n",
    "def pretty(df_in, digits=3):\n",
    "    df2 = df_in.copy()\n",
    "    df2[\"sig\"] = df2[\"p\"].apply(add_sig_stars)\n",
    "    for c in [\"coef\",\"se_HC3\",\"t\",\"ci_low\",\"ci_high\"]:\n",
    "        df2[c] = df2[c].map(lambda x: round(float(x), digits))\n",
    "    df2[\"p\"] = df2[\"p\"].map(lambda x: f\"{x:.3g}\")\n",
    "    return df2[[\"term\",\"coef\",\"se_HC3\",\"t\",\"p\",\"sig\",\"ci_low\",\"ci_high\"]]\n",
    "\n",
    "# Key results\n",
    "KEY_TERMS = [P, S, f\"{P}:{S}\", f\"{P}:norm_c\", f\"{P}:remote_c\"]\n",
    "\n",
    "def model_stats_line(m):\n",
    "    # statsmodels: nobs, rsquared, rsquared_adj\n",
    "    n = int(m.nobs)\n",
    "    r2 = m.rsquared\n",
    "    r2a = m.rsquared_adj\n",
    "    return f\"N = {n:,} | R² = {r2:.3f} | Adj. R² = {r2a:.3f}\"\n",
    "\n",
    "def show_model(title, m):\n",
    "    display(Markdown(f\"**{title}**  \\n{model_stats_line(m)}\"))\n",
    "    tbl = pretty(full_table(m))\n",
    "    display(tbl)\n",
    "\n",
    "def show_model_key_terms(title, m):\n",
    "    display(Markdown(f\"**{title}**  \\n{model_stats_line(m)}\"))\n",
    "    tbl = pretty(full_table(m))\n",
    "    tbl_key = tbl[tbl[\"term\"].isin(KEY_TERMS)].copy()\n",
    "    display(Markdown(f\"*Key terms (for quick reading):*\"))\n",
    "    display(tbl_key)\n",
    "\n",
    "# ============================================================\n",
    "# 4) Run + Display (15 models)\n",
    "# ============================================================\n",
    "\n",
    "for fig_name, dv in DVs.items():\n",
    "    display(Markdown(f\"## {fig_name}\"))\n",
    "    \n",
    "    # H1\n",
    "    f1 = f\"{dv} ~ {P}*{S} + {CONTROLS}\"\n",
    "    m1 = fit_ols(f1, df)\n",
    "    show_model_key_terms(\"Model H1 (baseline): DV ~ Presence*Support + controls\", m1)\n",
    "    show_model(\"Full results (H1)\", m1)\n",
    "\n",
    "    # H2\n",
    "    f2 = f\"{dv} ~ {P}*{S} + norm_c + {P}:norm_c + {CONTROLS}\"\n",
    "    m2 = fit_ols(f2, df)\n",
    "    show_model_key_terms(\"Model H2: DV ~ Presence*Support + Norm + Presence×Norm + controls\", m2)\n",
    "    show_model(\"Full results (H2)\", m2)\n",
    "\n",
    "    # H3\n",
    "    f3 = f\"{dv} ~ {P}*{S} + remote_c + {P}:remote_c + {CONTROLS}\"\n",
    "    m3 = fit_ols(f3, df)\n",
    "    show_model_key_terms(\"Model H3: DV ~ Presence*Support + RemoteFeas + Presence×RemoteFeas + controls\", m3)\n",
    "    show_model(\"Full results (H3)\", m3)\n",
    "\n",
    "display(Markdown(\"✅ **Done. All 15 full regression tables are displayed above.**\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "604e88df-35ff-4aa9-9813-c55e2f7ad92a",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fd210266-d219-44fd-9d0d-149957158e7d",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "33fc21cb-d309-453f-bc7c-c6df5ffd8f0c",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
